By Kelly Chu (z5255293)
The project will use past population data to predict future mortality rates for Australia. This is a regression type problem with log mortality rates $log(m_x)$ regressed against calendar year $t$, age $x$ and gender to forecast mortality rates.
We begin by fitting a baseline Lee-Carter model before looking to how predictions can be improved with deep learning models.
from pathlib import Path
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import seaborn as sns
# import minmaxscaler
from sklearn.preprocessing import
from tensorflow.keras.utils import timeseries_dataset_from_array
from scipy.linalg import svd
from statsmodels.tsa.arima.model import ARIMA
import tensorflow as tf
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OrdinalEncoder, StandardScaler, MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import Input, Concatenate, Dropout, Embedding, Flatten, Dense, BatchNormalization, LSTM, GRU
import keras_tuner as kt
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
def plotPredictions(data, model, datasplit, G):
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2, figsize=(15,10))
plt.suptitle(f"{country} {G} {model} Prediction on {datasplit} Set")
ax1.set_ylim(-9.5, 0)
ax1.set_xlim(-9.5, 0)
ax2.set_xlim(-9.5, 0)
ax2.set_ylim(-1.5, 1.5)
ax1.scatter(data['logmx'], data['pred_logmx'])
ax1.set_ylabel("Predictions")
ax1.set_xlabel("True values")
ax1.set_title(f"{datasplit} Predictions vs Actual")
xl = ax1.get_xlim()
yl = ax1.get_ylim()
shortestSide = min(xl[1], yl[1])
ax1.plot(xl, yl, color="black", linestyle="--")
sns.regplot(x = data['logmx'], y = data['residuals'], data = None, scatter = True, ax=ax2, truncate = False)
ax2.set_title(f"{model} {datasplit} Residuals vs log(mx)")
sns.regplot(x = data['Age'].astype('int'), y = data['residuals'], data = None, scatter = True, ax=ax3, truncate = False)
ax3.set_title(f"{model} {datasplit} Residuals vs Age")
sns.regplot(x = data['Year'].astype('int'), y = data['residuals'], data = None, scatter = True, ax=ax4, truncate = False)
ax4.set_title(f"{model} {datasplit} Residuals vs Year")
plt.savefig(f"plots/{country} {G} {model} Prediction on {datasplit} Set")
def residualsHeatmap (data, model, datasplit, gender):
compare = max(abs(data.residuals.min()), abs(data.residuals.max()))
minvalue = -compare
maxvalue = compare
# pivot data to acceptable form for heatmap
plot_data = pd.pivot_table(data, values = 'residuals', index = 'Age', columns = 'Year')
# plot heatmap
heatmap = sns.heatmap(data = plot_data,vmin = -1.5, vmax = 1.5, cmap = "RdBu_r").invert_yaxis()
# add title and labels
plt.title(f"{country} {gender} {model} {datasplit} Residuals Residuals")
plt.xlabel('Calendar Year t')
plt.ylabel('Age x')
plt.savefig(f"plots/{country} {gender} {model} {datasplit} Residuals Heatplot")
def plot_train_history(history, model):
loss = hist.history['loss']
val_loss = hist.history['val_loss']
epochs = range(len(loss))
plt.figure()
plt.plot(epochs, loss, label='Training loss')
plt.plot(epochs, val_loss, label='Validation loss')
plt.xlabel('epoch')
plt.ylabel('MSE Loss')
plt.title(f"{country} {model} Training History")
plt.legend()
plt.show()
plt.savefig(f"plots/{country} {model} Training History")
def nnPredictions(model, x_data, y_data):
y = y_data
pred = model.predict(x_data).flatten()
merge = pd.DataFrame({"pred_logmx": pred, "logmx": y})
merge['pred_mx'] = merge['pred_logmx'].apply(lambda x: np.exp(x))
merge['mx'] = merge['logmx'].apply(lambda x: np.exp(x))
merge['residuals'] = merge['pred_logmx'] - merge['logmx']
merge['loss'] = merge['residuals'] ** 2
return merge
The project uses the Australia population data set available on Human Mortality Database (HMD) - labelled as "AUS". https://www.mortality.org/File/GetDocument/hmd.v6/AUS/STATS/Mx_1x1.txt
country = 'AUS'
random.seed(17)
if not Path("Mx_1x1.txt").exists():
print("Downloading dataset")
!wget https://github.com/kchu1711/actl3143/blob/a1e55f150a0dedf6360d206f666b52f4425843d2/assignment/Mx_1x1.txt
all_mort = pd.read_csv('Mx_1x1.txt', skiprows= 2, sep = '\s+')
print(f"shape of DataFrame: {all_mort.shape}")
shape of DataFrame: (10989, 5)
all_mort.head()
| Year | Age | Female | Male | Total | |
|---|---|---|---|---|---|
| 0 | 1921 | 0 | 0.059987 | 0.076533 | 0.068444 |
| 1 | 1921 | 1 | 0.012064 | 0.014339 | 0.013225 |
| 2 | 1921 | 2 | 0.005779 | 0.006047 | 0.005916 |
| 3 | 1921 | 3 | 0.002889 | 0.004197 | 0.003554 |
| 4 | 1921 | 4 | 0.003254 | 0.003254 | 0.003254 |
# make data wider to longer
all_mort = all_mort.melt(id_vars=['Year','Age'], value_vars =['Female','Male'], var_name = "Gender", value_name='mx')
#change 110+ to 110
all_mort.loc[all_mort['Age'] == '110+', 'Age'] = '110'
#replace . or 0 with NA
all_mort.loc[all_mort['mx'] == '.', 'mx'] = np.nan
all_mort.loc[all_mort['mx'] == 0, 'mx'] = np.nan
#convert type
all_mort['Gender'] = all_mort['Gender'].astype('category')
all_mort['Age'] = all_mort['Age'].astype('int')
all_mort['mx'] = all_mort['mx'].astype('float')
all_mort.dtypes
Year int64 Age int32 Gender category mx float64 dtype: object
Pre-processing steps were then applied so that the data is in the proper format for our models. This includes:
#keep rows where year is within range and age is between 0 and 100
all_mort = all_mort[(all_mort['Age'] >= 0) & (all_mort['Age'] < 100)]
#log mx
all_mort['logmx'] = np.log(all_mort['mx'])
# set index to be equal to the Year
all_mort['t'] = pd.to_datetime(all_mort['Year'], format='%Y')
all_mort.set_index('t', inplace=True)
After cleaning, there are no null values and all features are of expected type.
# check for null values
all_mort.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 19800 entries, 1921-01-01 to 2019-01-01 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Year 19800 non-null int64 1 Age 19800 non-null int32 2 Gender 19800 non-null category 3 mx 19800 non-null float64 4 logmx 19800 non-null float64 dtypes: category(1), float64(2), int32(1), int64(1) memory usage: 715.6 KB
all_mort.head()
| Year | Age | Gender | mx | logmx | |
|---|---|---|---|---|---|
| t | |||||
| 1921-01-01 | 1921 | 0 | Female | 0.059987 | -2.813627 |
| 1921-01-01 | 1921 | 1 | Female | 0.012064 | -4.417529 |
| 1921-01-01 | 1921 | 2 | Female | 0.005779 | -5.153525 |
| 1921-01-01 | 1921 | 3 | Female | 0.002889 | -5.846845 |
| 1921-01-01 | 1921 | 4 | Female | 0.003254 | -5.727870 |
Select gender to investigate - Female, Male or Total
gender = 'Female'
gender_mort = all_mort[all_mort['Gender'] == gender].drop(columns = ['Gender'])
gender_mort.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 9900 entries, 1921-01-01 to 2019-01-01 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Year 9900 non-null int64 1 Age 9900 non-null int32 2 mx 9900 non-null float64 3 logmx 9900 non-null float64 dtypes: float64(2), int32(1), int64(1) memory usage: 348.0 KB
The below heatmap illustrates the relationship between age x and the calendar year t. From the colour scale, red and blue represents low and high mortality rates respectively.
We can observe the following patterns:
# find min value and max value to set the scale of the label to be consistent between male and female
minvalue = all_mort.logmx.min().min()
maxvalue = all_mort.logmx.max().max()
# pivot data to acceptable form for heatmap
plot_data = pd.pivot_table(gender_mort, values = 'logmx', index = 'Age', columns = 'Year')
# plot heatmap
heatmap = sns.heatmap(data = plot_data,vmin = minvalue, vmax = maxvalue, cmap = "RdBu_r", square=True).invert_yaxis()
# add title and labels
plt.title(f"{country} {gender} Log-Mortality Rates")
plt.xlabel('Calendar Year t')
plt.ylabel('Age x')
plt.savefig(f"plots/{country} {gender} Log-Mortality Rates")
# setup the normalization and the colormap
normalize = mcolors.Normalize(vmin=plot_data.columns.min(), vmax=plot_data.columns.max())
colormap = cm.RdBu
# plot
for n in plot_data.columns:
plt.plot(plot_data[n], color=colormap(normalize(n)))
# setup the colorbar
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(plot_data.columns)
plt.colorbar(scalarmappaple)
# add axis labs
plt.title(f"{country} {gender} Log-Mortality Rates Over Calendar Years")
plt.xlabel('Age')
plt.ylabel('Log-Mortality Rates')
# show the figure
plt.savefig(f"plots/{country} {gender} Log-Mortality Rates Over Calendar Years")
plt.show()
In the below plot, we can observe that improvements in mortality over the past decades is more evident in younger ages - particularly of infants and children whereas those in older ages have experienced relatively less mortality improvements.
plot_data_t = plot_data.transpose()
# setup the normalization and the colormap
normalize = mcolors.Normalize(vmin=plot_data_t.columns.min(), vmax=plot_data_t.columns.max())
colormap = cm.RdBu
# plot
for n in plot_data_t.columns:
plt.plot(plot_data_t[n], color=colormap(normalize(n)))
# setup the colorbar
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(plot_data_t.columns)
plt.colorbar(scalarmappaple)
# add axis labs
plt.title(f"{country} {gender} Log-Mortality Rates Over Ages")
plt.xlabel('Calendar Years')
plt.ylabel('Log-Mortality Rates')
# show the figure
plt.savefig(f"plots/{country} {gender} Log-Mortality Rates Over Ages")
plt.show()
We then split the data into approximately 60% training, 20% validation and 20% test set, split by calendar year. The training set will contain only the observations prioer to the ones from the test set so that no future observations are used in constructing the forecast. This prevents any leaks that may cause a bias in estimation which leads to deviations in prediction error while applying the model to new unseen data.
# find year to split data at
minYear = gender_mort.Year.min()
maxYear = gender_mort.Year.max()
interval = (maxYear - minYear)/10
traincut = int(minYear + interval * 6)
valcut = int(minYear + interval * 8)
trainall = all_mort[all_mort['Year'] <= traincut]
valall = all_mort[(all_mort['Year'] > traincut) & (all_mort['Year'] <= valcut)]
testall = all_mort[all_mort['Year']> valcut]
print(f"trainall shape: {trainall.shape}, min year: {trainall.Year.min()}, max year: {trainall.Year.max()}, number of years: {trainall.Year.nunique()}")
print(f"valall shape: {valall.shape}, min year: {valall.Year.min()}, max year: {valall.Year.max()}, number of years: {valall.Year.nunique()}")
print(f"testall shape: {testall.shape}, min year: {testall.Year.min()}, max year: {testall.Year.max()}, number of years: {testall.Year.nunique()}")
trainall shape: (11800, 5), min year: 1921, max year: 1979, number of years: 59 valall shape: (4000, 5), min year: 1980, max year: 1999, number of years: 20 testall shape: (4000, 5), min year: 2000, max year: 2019, number of years: 20
# select gender from train
trainfemale = trainall[trainall["Gender"]== "Female"].drop(columns = "Gender")
valfemale = valall[valall["Gender"]== "Female"].drop(columns = "Gender")
testfemale = testall[testall["Gender"]== "Female"].drop(columns = "Gender")
trainmale = trainall[trainall["Gender"]== "Male"].drop(columns = "Gender")
valmale = valall[valall["Gender"]== "Male"].drop(columns = "Gender")
testmale = testall[testall["Gender"]== "Male"].drop(columns = "Gender")
The model calculates the logarithm of the central death rate $log(m_{x,t})$ at age $x$ in the calendar year $t$ as: $$\log(m_{x,t}) = \mathbb{a}_x + b_x k_t + e_{x,t} \tag{$1$} $$ We will calculate the parameters from first principles - methodology can be found in the report appendix
def LCParameters(train):
#Step 1
a_x = train.groupby('Age')['logmx'].mean()
# Step 2
train = train.assign(
a_x = train.groupby(by = 'Age')['logmx'].transform('mean'),
mx_adj = lambda x: x['logmx'] - x['a_x']
)
rates_mat = pd.pivot_table(train, values = 'mx_adj', index = 'Age', columns = 'Year')
# Step 3
u, s, vT = svd(rates_mat)
# Step 4
b_x = u[:,0] * s[0]
k_t = vT.transpose()[:,0]
# Step 5
c1 = k_t.sum()
c2 = b_x.sum()
a_x = a_x + c1 * b_x
b_x = b_x / c2
k_t = (k_t - c1) * c2
return a_x, b_x, k_t
a_xF, b_xF, k_tF = LCParameters(trainfemale)
a_xM, b_xM, k_tM = LCParameters(trainmale)
Plots of our parameters reflect much of the observed trends in our exploratory data analysis.
def plotLCParams(a_x, b_x, k_t, train, G):
fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(15,5))
plt.suptitle(f"{country} {G} LC Parameter Estimation")
ax1.plot(a_x)
ax1.set_xlabel('Age x')
ax1.set_title('a_x vs age')
ax2.plot(b_x)
ax2.set_xlabel('Age x')
ax2.set_title('b_x vs x')
ax3.plot(train.Year.unique(),k_t)
ax3.set_xlabel('Calendar Year t')
ax3.set_title('k_t vs t')
plt.savefig(f"plots/{country} {G} LC Parameter Estimation")
plotLCParams(a_xF, b_xF,k_tF, trainfemale, "Female")
plotLCParams(a_xM, b_xM,k_tM, trainmale, "Male")
As $a_x$ and $b_x$ are age-dependent rather than time-dependent, it is assumed they are constant over time. Hence, for our forecast, $k_t$ is the only parameter required to be extrapolated.
This is achieved through projection as a random walk with drift by modelling $k_t$ as an independent ARIMA(0,1,0) process. $$k_t = k_{t-1} + \gamma + e_t$$ where $e_t ~ N(0,\sigma_\epsilon^2)$ and $\gamma$ is the drift
def LCForecast(train, val, k_t, G):
# fit ARIMA(0,1,0) to k_t
model = ARIMA(k_t, order=(0,1,0), trend = "t")
model_fit = model.fit()
# forecast k_t and confidence intervals
forecast = model_fit.get_forecast(steps=len(val.Year.unique()))
k_t_forecast = forecast.predicted_mean
k_t_forecast_ci = forecast.conf_int(alpha = 0.05)
# plot training and forecasted values
ax4 = plt.subplot(1,1,1)
ax4.plot(train.Year.unique(), k_t, label=f'{G} Train')
ax4.plot(val.Year.unique(), k_t_forecast, label=f'{G} Forecast (95% CI)')
ax4.fill_between(val.Year.unique(), k_t_forecast_ci[:,0], k_t_forecast_ci[:,1], alpha=.25)
ax4.set_xlabel('Calendar Year t')
ax4.set_ylabel('k_t')
ax4.legend(loc='upper right')
return k_t_forecast
k_t_forecastF = LCForecast(trainfemale, valfemale, k_tF, "Female")
k_t_forecastM = LCForecast(trainmale, valmale, k_tM, "Male")
plt.title(f"{country} Estimated Process k_t")
plt.savefig(f"plots/{country} Estimated Process k_t")
def fitLC(data, a_x, b_x, k_t):
# calculate log(mx) = ax + b_x * k_t
fitted = np.array([a_x]) + np.dot(np.array([k_t]).T, np.array([b_x]))
# change to dataframe
fitted_df = pd.DataFrame(fitted)
# add column names
fitted_df.columns = data.Age.unique()
# add Year column
fitted_df['Year'] = data.Year.unique()
# melt the dataframe
fitted_dfmelt = fitted_df.melt(id_vars=['Year'], value_name='pred_logmx', var_name='Age')
# calculate mx = exp(log(mx))
fitted_dfmelt['pred_mx'] = fitted_dfmelt['pred_logmx'].apply(lambda x: np.exp(x))
# merge train and fitted data
merge = pd.merge(data, fitted_dfmelt, on=['Year', 'Age'])
# add residuals column
merge['residuals'] = merge['pred_logmx'] - merge['logmx']
return merge
LC_trainF = fitLC(trainfemale, a_xF, b_xF, k_tF)
LC_valF = fitLC(valfemale, a_xF, b_xF, k_t_forecastF)
LC_trainM = fitLC(trainmale, a_xM, b_xM, k_tM)
LC_valM = fitLC(valmale, a_xM, b_xM, k_t_forecastM)
Plots of prediction performance reveal that our Lee Carter model performs quite well with most of the predictions lying on a 45 degree line in a plot against the actual values. Residuals also to be close to zero, however it should be noted that residuals tend to be larger when predicting higher mortality rates.
plotPredictions(LC_trainM, 'LC', 'Training', "Male")
plotPredictions(LC_valM, 'LC', 'Validation', "Male")
plotPredictions(LC_trainF, 'LC', 'Training', "Female")
plotPredictions(LC_valF, 'LC', 'Validation', "Female")
residualsHeatmap(LC_trainF, 'LC', 'Training', 'Female')
residualsHeatmap(LC_valF, 'LC', 'Validation', 'Female')
residualsHeatmap(LC_trainM, 'LC', 'Training', 'Male')
residualsHeatmap(LC_valM, 'LC', 'Validation', 'Male')
Calculation of MSE
The mean squared error was then calculated between the Lee Carter model's estimates of $m_x$ and the raw data's $m_x$. This serves as the baseline for our neural networks to beat.
mseLCTrainF = mean_squared_error(LC_trainF['mx'], LC_trainF['pred_mx'])
mseLCValF = mean_squared_error(LC_valF['mx'], LC_valF['pred_mx'])
mseLCTrainM = mean_squared_error(LC_trainM['mx'], LC_trainM['pred_mx'])
mseLCValM = mean_squared_error(LC_valM['mx'], LC_valM['pred_mx'])
mseTrain = {"Lee Carter": [mseLCTrainF, mseLCTrainM, (mseLCTrainF + mseLCTrainM)/2]}
mseVal = {"Lee Carter": [mseLCValF, mseLCValM, (mseLCValF + mseLCValM)/2]}
#Source: https://mobiarch.wordpress.com/2020/11/13/preparing-time-series-data-for-rnn-in-tensorflow/
def timeseries_dataset_one_step(features, labels, input_sequence_length):
return tf.keras.preprocessing.timeseries_dataset_from_array(features[:-1], \
np.roll(labels, -input_sequence_length, axis=0)[:-1], \
input_sequence_length)
def GRUDataProcessing(data, T0):
xmat = np.zeros((1, T0, 1))
ymat = np.zeros((1, 1))
YAmat = np.zeros((1, 2))
for age in range(data.Age.max()):
inputdata = data[data['Age'] == age].drop(columns = ["mx"])
# Predict the next year ahead.
ahead = 1
# The index of the first target.
delay = (T0+ahead-1)
DS = \
timeseries_dataset_one_step(
inputdata,
inputdata,
T0
)
x_age = np.concatenate(list(DS.map(lambda x, y: x)))
y_age = np.concatenate(list(DS.map(lambda x, y: y)))
# append the data to the matrices
xmat= np.append(xmat, x_age[:, :, 2:], axis = 0)
ymat = np.append(ymat, y_age[:, 2:], axis= 0)
YAmat = np.append(YAmat, y_age[:, 0:2], axis= 0)
# remove the first row of zeros (used for initialization)
xmat = xmat[1:]
ymat = ymat[1:]
YAmat = YAmat[1:]
return xmat, ymat, YAmat
# set parameters
T0 = 10 # lookback period
# data processing
x_trainF, y_trainF, trainF_AY = GRUDataProcessing(trainfemale, T0)
x_trainM, y_trainM, trainM_AY = GRUDataProcessing(trainmale, T0)
x_valF, y_valF, valF_AY = GRUDataProcessing(valfemale, T0)
x_valM, y_valM, valM_AY = GRUDataProcessing(valmale, T0)
x_testF, y_testF, testF_AY = GRUDataProcessing(testfemale, T0)
x_testM, y_testM, testM_AY = GRUDataProcessing(testmale, T0)
def combinedata(xdataF, ydataF, xdataM, ydataM, F_AY, M_AY):
numRows = xdataF.shape[0]
genderIndicator = np.tile([0,1], numRows) #note, 0 is Female
yDATA = np.zeros((numRows * 2, 1))
xDATA = np.zeros((numRows * 2, T0, 1))
AY = np.zeros((numRows * 2, 2))
for l in range(numRows):
xDATA[l*2] = xdataF[l]
xDATA[l*2 + 1] = xdataM[l]
yDATA[l*2] = ydataF[l]
yDATA[l*2 + 1] = ydataM[l]
AY[l*2] = F_AY[l]
AY[l*2 + 1] = M_AY[l]
return xDATA, yDATA, genderIndicator, AY
x_train, y_train, genderIndicator_train, AY_train = combinedata(x_trainF, y_trainF, x_trainM, y_trainM, trainF_AY, trainM_AY)
x_val, y_val, genderIndicator_val, AY_val = combinedata(x_valF, y_valF, x_valM, y_valM, valF_AY, valM_AY)
x_test, y_test, genderIndicator_test, AY_test = combinedata(x_testF, y_testF, x_testM, y_testM, testF_AY, testM_AY)
# scale the data
x_min = x_train.min(axis = 0).min(axis = 0).min(axis = 0)
x_max = x_train.max(axis = 0).max(axis = 0).max(axis = 0)
f = lambda x: 2 * (x - x_min) / (x_min - x_max) - 1
x_train= f(x_train)
x_val = f(x_val)
x_test = f(x_test)
# append gender
x_train = [x_train, genderIndicator_train]
x_val = [x_val, genderIndicator_val]
x_test = [x_test, genderIndicator_test]
def model_builder(hp):
layers = hp.Int('layers', 1,3)
activation = hp.Choice('activation', ['relu', 'tanh'])
learning_rate = hp.Choice('learning_rate', [0.001, 0.01])
inp = Input(shape=(T0, 1), name = "input")
gender = Input(shape = (1), name = "gender")
x = inp
for i in range(layers):
x = GRU(units = hp.Int(f'neurons_{i}', min_value = 5, max_value = 20, step = 5), activation = activation, return_sequences = True, name = f"GRU{i+1}")(x)
x = GRU(units = hp.Int(f'final_neurons', min_value = 5, max_value = 20, step = 5), activation = activation, name = f"FinalGRU")(x)
concat = Concatenate(name = "combined")([x, gender])
output = Dense(1, name = "Output", activation = "linear")(concat)
modelGRU = Model(inputs = [inp, gender], outputs = [output])
modelGRU.compile(loss="mse", optimizer=Adam(learning_rate = learning_rate))
return modelGRU
tuner = kt.BayesianOptimization(
model_builder,
objective = 'val_loss',
max_trials = 20,
overwrite = True,
seed = 17,
directory = "BOGRU10")
tuner.search_space_summary()
Search space summary
Default search space size: 5
layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': None}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
learning_rate (Choice)
{'default': 0.001, 'conditions': [], 'values': [0.001, 0.01], 'ordered': True}
neurons_0 (Int)
{'default': None, 'conditions': [], 'min_value': 5, 'max_value': 20, 'step': 5, 'sampling': None}
final_neurons (Int)
{'default': None, 'conditions': [], 'min_value': 5, 'max_value': 20, 'step': 5, 'sampling': None}
es = EarlyStopping(patience=10, restore_best_weights=True, verbose=1)
tuner.search(
x_train, y_train,
batch_size = 20,
epochs = 100,
validation_data = (x_val, y_val),
callbacks = [es],
verbose = 1)
Trial 20 Complete [00h 04m 54s] val_loss: 0.023385172709822655 Best val_loss So Far: 0.01834750920534134 Total elapsed time: 00h 38m 42s INFO:tensorflow:Oracle triggered exit
tuner.results_summary()
Results summary Results in BOGRU10\untitled_project Showing 10 best trials <keras_tuner.engine.objective.Objective object at 0x000002C75C599D30> Trial summary Hyperparameters: layers: 1 activation: relu learning_rate: 0.001 neurons_0: 20 final_neurons: 20 neurons_1: 20 neurons_2: 20 Score: 0.01834750920534134 Trial summary Hyperparameters: layers: 1 activation: relu learning_rate: 0.01 neurons_0: 5 final_neurons: 20 neurons_1: 5 neurons_2: 5 Score: 0.01844698004424572 Trial summary Hyperparameters: layers: 1 activation: tanh learning_rate: 0.01 neurons_0: 20 final_neurons: 20 neurons_1: 5 neurons_2: 5 Score: 0.018692746758461 Trial summary Hyperparameters: layers: 1 activation: relu learning_rate: 0.001 neurons_0: 20 final_neurons: 20 neurons_1: 5 neurons_2: 20 Score: 0.01870134472846985 Trial summary Hyperparameters: layers: 1 activation: relu learning_rate: 0.001 neurons_0: 5 final_neurons: 20 neurons_1: 5 neurons_2: 20 Score: 0.020406166091561317 Trial summary Hyperparameters: layers: 3 activation: tanh learning_rate: 0.001 neurons_0: 5 final_neurons: 20 neurons_1: 5 neurons_2: 20 Score: 0.02051377110183239 Trial summary Hyperparameters: layers: 1 activation: tanh learning_rate: 0.01 neurons_0: 20 final_neurons: 20 neurons_1: 20 neurons_2: 20 Score: 0.02072201296687126 Trial summary Hyperparameters: layers: 1 activation: relu learning_rate: 0.001 neurons_0: 20 final_neurons: 15 neurons_1: 10 neurons_2: 5 Score: 0.021342691034078598 Trial summary Hyperparameters: layers: 1 activation: relu learning_rate: 0.001 neurons_0: 20 final_neurons: 5 neurons_1: 5 neurons_2: 20 Score: 0.021426972001791 Trial summary Hyperparameters: layers: 1 activation: tanh learning_rate: 0.001 neurons_0: 5 final_neurons: 20 neurons_1: 5 neurons_2: 5 Score: 0.021775659173727036
best_params = tuner.get_best_hyperparameters()
best_params[0].values
{'layers': 1,
'activation': 'relu',
'learning_rate': 0.001,
'neurons_0': 20,
'final_neurons': 20,
'neurons_1': 20,
'neurons_2': 20}
bestGRUModel = tuner.get_best_models()[0]
bestGRUModel.summary()
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input (InputLayer) [(None, 10, 1)] 0 []
GRU1 (GRU) (None, 10, 20) 1380 ['input[0][0]']
FinalGRU (GRU) (None, 20) 2520 ['GRU1[0][0]']
gender (InputLayer) [(None, 1)] 0 []
combined (Concatenate) (None, 21) 0 ['FinalGRU[0][0]',
'gender[0][0]']
Output (Dense) (None, 1) 22 ['combined[0][0]']
==================================================================================================
Total params: 3,922
Trainable params: 3,922
Non-trainable params: 0
__________________________________________________________________________________________________
plot_model(bestGRUModel, show_shapes=True)
hist = bestGRUModel.fit(x = x_train, y = y_train, epochs = 100, validation_data = (x_val, y_val), callbacks = [es])
Epoch 1/100 304/304 [==============================] - 4s 8ms/step - loss: 0.0166 - val_loss: 0.0190 Epoch 2/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0171 - val_loss: 0.0213 Epoch 3/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0173 - val_loss: 0.0201 Epoch 4/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0163 - val_loss: 0.0202 Epoch 5/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0163 - val_loss: 0.0248 Epoch 6/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0170 - val_loss: 0.0184 Epoch 7/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0165 - val_loss: 0.0195 Epoch 8/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0160 - val_loss: 0.0203 Epoch 9/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0167 - val_loss: 0.0469 Epoch 10/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0172 - val_loss: 0.0408 Epoch 11/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0165 - val_loss: 0.0175 Epoch 12/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0161 - val_loss: 0.0298 Epoch 13/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0169 - val_loss: 0.0297 Epoch 14/100 304/304 [==============================] - 2s 8ms/step - loss: 0.0169 - val_loss: 0.0199 Epoch 15/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0161 - val_loss: 0.0239 Epoch 16/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0161 - val_loss: 0.0277 Epoch 17/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0168 - val_loss: 0.0181 Epoch 18/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0161 - val_loss: 0.0176 Epoch 19/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0157 - val_loss: 0.0189 Epoch 20/100 304/304 [==============================] - 2s 7ms/step - loss: 0.0165 - val_loss: 0.0254 Epoch 21/100 297/304 [============================>.] - ETA: 0s - loss: 0.0180Restoring model weights from the end of the best epoch: 11. 304/304 [==============================] - 2s 7ms/step - loss: 0.0180 - val_loss: 0.0200 Epoch 21: early stopping
plot_train_history(hist, "GRU Model")
<Figure size 432x288 with 0 Axes>
GRU_train = nnPredictions(bestGRUModel, x_train, y_train.flatten())
GRU_train = pd.concat([pd.DataFrame(GRU_train), pd.DataFrame(AY_train, columns = ['Year','Age']).astype('int'), pd.DataFrame(genderIndicator_train, columns = ['Gender'])], axis = 1)
GRU_val = nnPredictions(bestGRUModel, x_val, y_val.flatten())
GRU_val = pd.concat([pd.DataFrame(GRU_val), pd.DataFrame(AY_val, columns = ['Year','Age']).astype('int'),pd.DataFrame(genderIndicator_val, columns = ['Gender'])], axis = 1)
GRU_trainF = GRU_train[GRU_train['Gender'] == 0]
GRU_valF = GRU_val[GRU_val['Gender'] == 0]
GRU_trainM = GRU_train[GRU_train['Gender'] == 1]
GRU_valM = GRU_val[GRU_val['Gender'] == 1]
304/304 [==============================] - 1s 3ms/step 62/62 [==============================] - 0s 2ms/step
plotPredictions(GRU_train, 'GRU','Training', 'Combined')
plotPredictions(GRU_val, 'GRU','Validation', 'Combined')
plotPredictions(GRU_trainF, 'GRU','Training', 'Female')
plotPredictions(GRU_valF, 'GRU','Validation', 'Female')
plotPredictions(GRU_trainM, 'GRU','Training', 'Male')
plotPredictions(GRU_valM, 'GRU','Validation', 'Male')
residualsHeatmap(GRU_trainF, 'GRU', 'Training', 'Female')
residualsHeatmap(GRU_valF, 'GRU', 'Validation', 'Female')
residualsHeatmap(GRU_trainM, 'GRU', 'Training', 'Male')
residualsHeatmap(GRU_valF, 'GRU', 'Validation', 'Male')
residualsHeatmap(GRU_train, 'GRU', 'Training', 'Combined')
residualsHeatmap(GRU_val, 'GRU', 'Validation', 'Combined')
mseGRUTrainF = mean_squared_error(GRU_trainF['mx'], GRU_trainF['pred_mx'])
mseGRUValF = mean_squared_error(GRU_valF['mx'], GRU_valF['pred_mx'])
mseGRUTrainM = mean_squared_error(GRU_trainM['mx'], GRU_trainM['pred_mx'])
mseGRUValM = mean_squared_error(GRU_valM['mx'], GRU_valM['pred_mx'])
mseGRUTrain = mean_squared_error(GRU_train['mx'], GRU_train['pred_mx'])
mseGRUVal = mean_squared_error(GRU_val['mx'], GRU_val['pred_mx'])
mseTrain["GRU"] = [mseGRUTrainF, mseGRUTrainM, mseGRUTrain]
mseVal["GRU"] = [mseGRUValF, mseGRUValM, mseGRUVal]
#separate features and targets
X_train = trainall.drop(columns = ['mx', 'logmx'])
y_train = trainall.logmx
X_val = valall.drop(columns = ['mx', 'logmx'])
y_val = valall.logmx
NUM_AGE, NUM_GENDER = X_train.nunique()[["Age", "Gender"]]
ct = make_column_transformer(
(OrdinalEncoder(),["Gender"]),
(StandardScaler(),["Year"]),
remainder = "passthrough"
)
X_train_ct = ct.fit_transform(X_train)
X_val_ct = ct.transform(X_val)
X_train_gender = X_train_ct[:,0]
X_val_gender = X_val_ct[:,0]
X_train_year = X_train_ct[:,1]
X_val_year = X_val_ct[:,1]
X_train_age = X_train_ct[:,2]
X_val_age = X_val_ct[:,2]
x_train = [X_train_age, X_train_gender, X_train_year]
x_val = [X_val_age, X_val_gender, X_val_year]
def model_builder(hp):
layers = hp.Int('layers', 1, 4)
activation = hp.Choice('activation', ['relu', 'tanh'])
dropout = hp.Float('dropout', 0.0, 0.5, step = 0.1)
gender_embedding = hp.Choice('gender_embedding', [2,3,5])
age_embedding = hp.Choice('age_embedding', [2,3,5])
age = Input(shape = [1], name = "age")
gender = Input(shape = [1], name = "gender")
year = Input(shape = [1], name = "year")
ageEE = Embedding(input_dim = NUM_AGE, output_dim = age_embedding, name = "ageEE")(age)
ageEE = Flatten()(ageEE)
genderEE = Embedding(input_dim = NUM_GENDER, output_dim = gender_embedding, name = "genderEE")(gender)
genderEE = Flatten()(genderEE)
features = Concatenate(name = "Combined")([ageEE, genderEE, year])
x = features
for i in range(layers):
x = Dense(units = hp.Float(f'neurons{i}', 10, 100, step = 10), activation = activation , name = f"hidden{i}")(x)
x = BatchNormalization()(x)
x = Dropout(dropout)(x)
concat = Concatenate(name = "combined")([x, features])
out = Dense(1, activation = "linear", name = "out")(concat)
DenseModel = Model([age, gender, year], out)
DenseModel.compile(optimizer = "adam", loss = "mse")
return DenseModel
tuner = kt.BayesianOptimization(
model_builder,
objective = 'val_loss',
max_trials = 20,
overwrite = True,
seed = 17,
directory = "BODense")
es = EarlyStopping(patience=10, restore_best_weights=True, verbose=1)
tuner.search(
x_train, y_train,
epochs = 100,
validation_data = (x_val, y_val),
callbacks = [es],
verbose = 1)
Trial 20 Complete [00h 00m 17s] val_loss: 0.04652658849954605 Best val_loss So Far: 0.030326131731271744 Total elapsed time: 00h 07m 02s INFO:tensorflow:Oracle triggered exit
tuner.results_summary()
Results summary Results in BODense\untitled_project Showing 10 best trials <keras_tuner.engine.objective.Objective object at 0x000002C75C182340> Trial summary Hyperparameters: layers: 4 activation: relu dropout: 0.0 gender_embedding: 5 age_embedding: 5 neurons0: 10.0 neurons1: 10.0 neurons2: 100.0 neurons3: 10.0 Score: 0.030326131731271744 Trial summary Hyperparameters: layers: 4 activation: relu dropout: 0.0 gender_embedding: 5 age_embedding: 2 neurons0: 10.0 neurons1: 10.0 neurons2: 100.0 neurons3: 10.0 Score: 0.03153066337108612 Trial summary Hyperparameters: layers: 4 activation: relu dropout: 0.0 gender_embedding: 5 age_embedding: 2 neurons0: 100.0 neurons1: 10.0 neurons2: 100.0 neurons3: 10.0 Score: 0.03298342600464821 Trial summary Hyperparameters: layers: 4 activation: relu dropout: 0.0 gender_embedding: 2 age_embedding: 2 neurons0: 10.0 neurons1: 10.0 neurons2: 100.0 neurons3: 10.0 Score: 0.036406874656677246 Trial summary Hyperparameters: layers: 4 activation: relu dropout: 0.0 gender_embedding: 2 age_embedding: 5 neurons0: 60.0 neurons1: 10.0 neurons2: 100.0 neurons3: 10.0 Score: 0.03853404521942139 Trial summary Hyperparameters: layers: 2 activation: relu dropout: 0.0 gender_embedding: 5 age_embedding: 2 neurons0: 40.0 neurons1: 10.0 neurons2: 100.0 neurons3: 10.0 Score: 0.03859080746769905 Trial summary Hyperparameters: layers: 4 activation: relu dropout: 0.0 gender_embedding: 5 age_embedding: 2 neurons0: 30.0 neurons1: 10.0 neurons2: 10.0 neurons3: 10.0 Score: 0.045110415667295456 Trial summary Hyperparameters: layers: 2 activation: relu dropout: 0.0 gender_embedding: 5 age_embedding: 5 neurons0: 100.0 neurons1: 10.0 neurons2: 100.0 neurons3: 10.0 Score: 0.045257631689310074 Trial summary Hyperparameters: layers: 4 activation: relu dropout: 0.0 gender_embedding: 5 age_embedding: 2 neurons0: 100.0 neurons1: 70.0 neurons2: 10.0 neurons3: 10.0 Score: 0.04649074003100395 Trial summary Hyperparameters: layers: 4 activation: relu dropout: 0.0 gender_embedding: 5 age_embedding: 5 neurons0: 10.0 neurons1: 100.0 neurons2: 10.0 neurons3: 100.0 Score: 0.04652658849954605
bestDenseModel = tuner.get_best_models()[0]
bestDenseModel.summary()
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
age (InputLayer) [(None, 1)] 0 []
gender (InputLayer) [(None, 1)] 0 []
ageEE (Embedding) (None, 1, 5) 500 ['age[0][0]']
genderEE (Embedding) (None, 1, 5) 10 ['gender[0][0]']
flatten (Flatten) (None, 5) 0 ['ageEE[0][0]']
flatten_1 (Flatten) (None, 5) 0 ['genderEE[0][0]']
year (InputLayer) [(None, 1)] 0 []
Combined (Concatenate) (None, 11) 0 ['flatten[0][0]',
'flatten_1[0][0]',
'year[0][0]']
hidden0 (Dense) (None, 10) 120 ['Combined[0][0]']
batch_normalization (BatchNorm (None, 10) 40 ['hidden0[0][0]']
alization)
dropout (Dropout) (None, 10) 0 ['batch_normalization[0][0]']
hidden1 (Dense) (None, 10) 110 ['dropout[0][0]']
batch_normalization_1 (BatchNo (None, 10) 40 ['hidden1[0][0]']
rmalization)
dropout_1 (Dropout) (None, 10) 0 ['batch_normalization_1[0][0]']
hidden2 (Dense) (None, 100) 1100 ['dropout_1[0][0]']
batch_normalization_2 (BatchNo (None, 100) 400 ['hidden2[0][0]']
rmalization)
dropout_2 (Dropout) (None, 100) 0 ['batch_normalization_2[0][0]']
hidden3 (Dense) (None, 10) 1010 ['dropout_2[0][0]']
batch_normalization_3 (BatchNo (None, 10) 40 ['hidden3[0][0]']
rmalization)
dropout_3 (Dropout) (None, 10) 0 ['batch_normalization_3[0][0]']
combined (Concatenate) (None, 21) 0 ['dropout_3[0][0]',
'Combined[0][0]']
out (Dense) (None, 1) 22 ['combined[0][0]']
==================================================================================================
Total params: 3,392
Trainable params: 3,132
Non-trainable params: 260
__________________________________________________________________________________________________
plot_model(bestDenseModel, show_shapes=True)
best_params = tuner.get_best_hyperparameters()
best_params[0].values
{'layers': 4,
'activation': 'relu',
'dropout': 0.0,
'gender_embedding': 5,
'age_embedding': 5,
'neurons0': 10.0,
'neurons1': 10.0,
'neurons2': 100.0,
'neurons3': 10.0}
es = EarlyStopping(patience=10, restore_best_weights=True, verbose=1)
hist = bestDenseModel.fit(
(X_train_age, X_train_gender, X_train_year),
y_train,
epochs = 100,
verbose = 1,
callbacks = [es],
validation_data = ((X_val_age, X_val_gender, X_val_year), y_val)
)
Epoch 1/100 369/369 [==============================] - 1s 2ms/step - loss: 0.0221 - val_loss: 0.0348 Epoch 2/100 369/369 [==============================] - 1s 1ms/step - loss: 0.0222 - val_loss: 0.0384 Epoch 3/100 369/369 [==============================] - 1s 1ms/step - loss: 0.0223 - val_loss: 0.0340 Epoch 4/100 369/369 [==============================] - 1s 1ms/step - loss: 0.0213 - val_loss: 0.0426 Epoch 5/100 369/369 [==============================] - 1s 1ms/step - loss: 0.0212 - val_loss: 0.0342 Epoch 6/100 369/369 [==============================] - 1s 2ms/step - loss: 0.0206 - val_loss: 0.0380 Epoch 7/100 369/369 [==============================] - 1s 2ms/step - loss: 0.0213 - val_loss: 0.0392 Epoch 8/100 369/369 [==============================] - 1s 2ms/step - loss: 0.0217 - val_loss: 0.0440 Epoch 9/100 369/369 [==============================] - 1s 2ms/step - loss: 0.0217 - val_loss: 0.0363 Epoch 10/100 369/369 [==============================] - 1s 1ms/step - loss: 0.0210 - val_loss: 0.0363 Epoch 11/100 369/369 [==============================] - 1s 1ms/step - loss: 0.0207 - val_loss: 0.0499 Epoch 12/100 369/369 [==============================] - 1s 1ms/step - loss: 0.0208 - val_loss: 0.0476 Epoch 13/100 360/369 [============================>.] - ETA: 0s - loss: 0.0208Restoring model weights from the end of the best epoch: 3. 369/369 [==============================] - 1s 2ms/step - loss: 0.0208 - val_loss: 0.0408 Epoch 13: early stopping
loss = hist.history['loss']
val_loss = hist.history['val_loss']
epochs = range(len(loss))
plt.figure()
plt.plot(epochs, loss, label='Training loss')
plt.plot(epochs, val_loss, label='Validation loss')
plt.xlabel('epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.show()
Dense_train = nnPredictions(bestDenseModel, x_train, y_train)
Dense_train = pd.concat([Dense_train, X_train['Age'].astype('int'), X_train['Year'].astype('int'), X_train['Gender']], axis = 1)
Dense_val = nnPredictions(bestDenseModel, x_val, y_val)
Dense_val = pd.concat([Dense_val, X_val['Age'].astype('int'), X_val['Year'].astype('int'), X_val['Gender']], axis = 1)
# split by Gender
Dense_trainF = Dense_train[Dense_train['Gender'] == 'Female']
Dense_valF = Dense_val[Dense_val['Gender'] == 'Female']
Dense_trainM = Dense_val[Dense_val['Gender'] == 'Male']
Dense_valM = Dense_val[Dense_val['Gender'] == 'Male']
369/369 [==============================] - 0s 611us/step 125/125 [==============================] - 0s 711us/step
plotPredictions(Dense_train, 'Dense','Training', 'Combined')
plotPredictions(Dense_val, 'Dense', 'Validation', 'Combined')
plotPredictions(Dense_trainF, 'Dense','Training', 'Female')
plotPredictions(Dense_valF, 'Dense', 'Validation', 'Female')
plotPredictions(Dense_trainM, 'Dense','Training', 'Male')
plotPredictions(Dense_valM, 'Dense', 'Validation', 'Male')
residualsHeatmap(Dense_trainF, 'Dense', 'Training', 'Female')
residualsHeatmap(Dense_valF, 'Dense', 'Validation', 'Female')
residualsHeatmap(Dense_trainM, 'Dense', 'Training', 'Male')
residualsHeatmap(Dense_valM, 'Dense', 'Validation', 'Male')
residualsHeatmap(Dense_train, 'Dense', 'Training', 'Combined')
residualsHeatmap(Dense_val, 'Dense', 'Validation', 'Combined')
mseDenseTrainF = mean_squared_error(Dense_trainF['mx'], Dense_trainF['pred_mx'])
mseDenseValF = mean_squared_error(Dense_valF['mx'], Dense_valF['pred_mx'])
mseDenseTrainM = mean_squared_error(Dense_trainM['mx'], Dense_trainM['pred_mx'])
mseDenseValM = mean_squared_error(Dense_valM['mx'], Dense_valM['pred_mx'])
mseDenseTrain = mean_squared_error(Dense_train['mx'], Dense_train['pred_mx'])
mseDenseVal = mean_squared_error(Dense_val['mx'], Dense_val['pred_mx'])
mseTrain["Dense"] = [mseDenseTrainF, mseDenseTrainM, mseDenseTrain]
mseVal["Dense"] = [mseDenseValF, mseDenseValM, mseDenseVal]
trainResults = pd.DataFrame(mseTrain).transpose()
trainResults.columns = ["Female", "Male", "Both"]
valResults = pd.DataFrame(mseVal).transpose()
valResults.columns = ["Female", "Male", "Both"]
trainResults
| Female | Male | Both | |
|---|---|---|---|
| Lee Carter | 0.000435 | 0.000597 | 0.000516 |
| GRU | 0.000273 | 0.000383 | 0.000328 |
| Dense | 0.000479 | 0.000445 | 0.000536 |
valResults
| Female | Male | Both | |
|---|---|---|---|
| Lee Carter | 0.000341 | 0.001184 | 0.000762 |
| GRU | 0.000031 | 0.000087 | 0.000059 |
| Dense | 0.000345 | 0.000445 | 0.000395 |
GRU_test= nnPredictions(bestGRUModel, x_test, y_test.flatten())
GRU_test = pd.concat([pd.DataFrame(GRU_test), pd.DataFrame(AY_test, columns = ['Year','Age']).astype('int'), pd.DataFrame(genderIndicator_test, columns = ['Gender']).astype('int')], axis = 1)
# split by Gender
GRU_testF = GRU_test[GRU_test['Gender'] == 0]
GRU_testM = GRU_test[GRU_test['Gender'] == 1]
62/62 [==============================] - 0s 2ms/step
plotPredictions(GRU_test, 'GRU','Test', 'Combined')
plotPredictions(GRU_testF, 'GRU','Test', 'Female')
plotPredictions(GRU_testM, 'GRU','Test', 'Male')
residualsHeatmap(GRU_test, 'GRU', 'Test', 'Combined')
residualsHeatmap(GRU_testM, 'Dense', 'Test', 'Male')
residualsHeatmap(GRU_testF, 'Dense', 'Test', 'Female')
print(f"GRU Combined Test Error: {mean_squared_error(GRU_test['mx'], GRU_test['pred_mx']):.20f}")
print(f"GRU Female Test Error: {mean_squared_error(GRU_testF['mx'], GRU_testF['pred_mx']):.20f}")
print(f"GRU Male Test Error: {mean_squared_error(GRU_testM['mx'], GRU_testM['pred_mx']):.20f}")
GRU Combined Test Error: 0.00001758021540975981 GRU Female Test Error: 0.00001021911563234223 GRU Male Test Error: 0.00002494131518717738